# -*- coding: utf-8 -*-
from scpy.logger import get_logger
import os
import sys
import requests
from bs4 import BeautifulSoup
import re

reload(sys)
sys.setdefaultencoding('utf-8')

logger = get_logger(__file__)

CURRENT_PATH = os.path.dirname(__file__)
if CURRENT_PATH:
    CURRENT_PATH = CURRENT_PATH + "/"

searchURL = 'http://search.51job.com/jobsearch/search_result.php'
CIDSEARCH_URL = 'http://www.lagou.com/jobs/list_{}'

def crawler_51job():
    postData = {
        'stype':1,
        'lang':'c',
        'fromType':102,
        'keywordtype':1,
        'keyword':'',
        'jobarea':'',
    }
    header = {
            'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding':'gzip, deflate',
            # Accept-Language:zh-CN,zh;q=0.8
            # Cache-Control:no-cache
            # Connection:keep-alive
            # Content-Length:119
            'Content-Type':'application/x-www-form-urlencoded',
            # Cookie:guid=14568036829843800082; guide=1; slife=lastvisit%3D060000; adv=adsnew%3D1%26%7C%26adsresume%3D1%26%7C%26adsfrom%3Dhttp%253A%252F%252Fbzclk.baidu.com%252Fadrc.php%253Ft%253D06KL00c00fAjOKR0I49R00uiAs0gLePg00000rvXrH300000LoHflE.THYdnyGEm6K85yF9pywdpAqVuNqsusK15yFBP1f1nWmknj0kPyRznjT0IHdanWTvwWK7PWDsnW9AwbfYfHT3fbFDnbR4fHbLPHFKfsK95gTqFhdWpyfqnWTLnj64PjfdnzusThqbpyfqnHm0uHdCIZwsrBtEIZF9mvR8PH7JUvc8mvqVQLwzmyP-QMKCTjq9uZP8IyYqP164nWn1Fh7JTjd9i7csmYwEIbs1ujPbXHfkHNIsI--GPyGBnWKvRjFpXycznj-uURusyb9yIvNM5HYhp1YsuHDdnWfYnhf3mhn4PHK-PHbvmhnYPWD4mvm4nAuhm6KWThnqnWn4PW6%2526tpl%253Dtpl_10085_12986_1%2526l%253D1043963629%2526wd%253D51job%2526issp%253D1%2526f%253D8%2526ie%253Dutf-8%2526rqlang%253Dcn%2526tn%253Dbaiduhome_pg%2526inputT%253D2601%26%7C%26adsnum%3D789233; 51job=cenglish%3D0; search=jobarea%7E%60000000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%601%A1%FB%A1%FA000000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA%CE%E4%BA%BA%CA%D0%CD%AC%B2%BD%D4%B6%B7%BD%D0%C5%CF%A2%BC%BC%CA%F5%BF%AA%B7%A2%D3%D0%CF%DE%B9%AB%CB%BE%A1%FB%A1%FA1%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1471406477%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA%7C%21recentSearch1%7E%601%A1%FB%A1%FA060000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA%D6%D8%C7%EC%BD%AD%D0%A1%B0%D7%BE%C6%D2%B5%D3%D0%CF%DE%B9%AB%CB%BE%A1%FB%A1%FA1%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1471405868%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA%7C%21recentSearch2%7E%601%A1%FB%A1%FA060000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA2400%A1%FB%A1%FA31%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA2%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1467892893%A1%FB%A1%FA0%A1%FB%A1%FA%7C%21recentSearch3%7E%601%A1%FB%A1%FA060000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA2400%A1%FB%A1%FA31%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA%B0%A2%C0%EF%B0%CD%B0%CD%A1%FB%A1%FA2%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1467892880%A1%FB%A1%FA0%A1%FB%A1%FA%7C%21recentSearch4%7E%602%A1%FB%A1%FA060000%2C00%A1%FB%A1%FA000000%A1%FB%A1%FA2801%A1%FB%A1%FA31%A1%FB%A1%FA4%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%A1%FB%A1%FA-1%A1%FB%A1%FA1456803749%A1%FB%A1%FA0%A1%FB%A1%FA%7C%21; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; NSC_tfbsdi.51kpc.dpn-159=ffffffffc3a01b2045525d5f4f58455e445a4a423660
            'Host':'search.51job.com',
            'Origin':'http://jobs.51job.com',
            # Pragma:no-cache'
            'Referer':'http://jobs.51job.com/all/',
            # Upgrade-Insecure-Requests:1
            'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
    }
    companyName = u'杭州誉存科技有限公司'.encode('GBK')
    postData['keyword'] = companyName
    response = requests.post(searchURL,data=postData,headers=header)
    # print response.content
    soup = BeautifulSoup(response.content,'html5lib')
    # print soup
    hrefString = soup.find('div',attrs={'class':'dw_table','id':'resultList'}).find_all('div',{'class':'el'})[1].find('span',{'class':'t2'}).find('a').attrs['href']
    companyCid = re.search('co(\d+)',hrefString).group(1)


def crawler_lagou():
    companyName = u'杭州誉存科技有限公司'
    header = {
       'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',
    }
    url = CIDSEARCH_URL.format(companyName)
    response = requests.get(url,headers= header)
    soup = BeautifulSoup(response.content,'html5lib')
    try:
        cid = soup.find('ul',attrs={'class':'item_con_list'}).find('li',{'class':'c_btn'}).find('a').attrs['data-lg-tj-cid']
        return cid
    except:
        return ''


if __name__ == '__main__':
    # crawler_51job()
    # crawler_lagou()
    AJAX_URL = 'http://www.lagou.com/gongsi/searchPosition.json'
    postdata = {
                'companyId': 347,
                'positionFirstType': u'全部',
                'pageNo': '20',
                'pageSize': '1000'
            }
    response = requests.post(AJAX_URL, data=postdata,headers={'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36',})
    print response.content